
/* THIS DO-FILE WAS WRITTEN IN MAY 2019 BY MARGHERITA COMOLA,
IT RUNS THE SIMULATIONS FOR THE COVERAGE PROBABILITIES WITH CLUSTERED DATA
(`Treatment Effects Accounting for Network Changes', Comola Prina, REStat) */

clear all
set more off

/********************************************************************************************
*********************************************************************************************
1) SETTINGS 
*********************************************************************************************
*********************************************************************************************/

/* observations */
global clusters="20"
global obs_c="50"
global obs=$clusters*$obs_c
global nall=$obs^2

/* number of replications*/
global simul = "300"

/* density of binary network at baseline */
global density = "0.1"

/* intervention-driven network changes: increase in link probability */
global ntk_change="0.05"

/* measurement error, in % points */
global prob_switch="1"

/* model's parameters */
global gamma="10"
global beta1="0.5"
global beta2="0.2"

/* variance of the error term in the two PE equations */
global var_PE="0.5"

/* variance of the (normal) random effect */
global var_re="1"

/*********************************************************************************************
*********************************************************************************************
2) PROGRAM 
*********************************************************************************************
*********************************************************************************************/

capture program drop simulation_ComolaPrina_cov
program define simulation_ComolaPrina_cov
drop _all

****************************************************************************
* GENERATE THE DATA
****************************************************************************/

/* DYADIC DATASET WITH CLUSTERED STRUCTURE */

/* random effects (village*time) */ 

clear 
set obs $clusters
generate village =_n
generate u_v0 = rnormal(0,$var_re)
generate u_v1 = rnormal(0,$var_re)
expand $obs_c
sort village
generate id =_n

/* treatment */

gen temp=runiform()
gen itt=(temp>=0.5)
drop temp

/* composite error term in PE equations */

gen er0=u_v0+ rnormal(0,$var_PE)
gen er1=u_v1+ rnormal(0,$var_PE)

mata: village= st_data(., "village")
mata: ITT= st_data(., "itt")
mata: er0 = st_data(., "er0")
mata: er1 = st_data(., "er1")

drop u_v0 u_v1 er0 er1

sort village id

save simul_ComolaPrina_cov.dta, replace

sort id 
foreach i in id itt village {
	rename `i' `i'_j
}

qui sum
save jjj.dta, replace

foreach i in id itt village {
	rename `i'_j `i'_i
}

cross using jjj.dta
capture erase jjj.dta

order id_i id_j itt_i itt_j

/* BINARY NETWORKS (Erdős–Rényi random graph) */ 

preserve
clear

/* binary network at baseline */

nwrandom $obs, prob($density) undirected

stack net1- net$obs, into (link0)
drop _stack
gen serial=_n
sort serial
save temp1.dta, replace
restore

/* intervention-driven network change */

preserve
clear

nwrandom $obs, prob($density+$ntk_change) undirected

stack net1- net$obs, into (link_temp)
drop _stack
gen serial=_n
sort serial
save temp2.dta, replace
restore

sort id_i id_j
gen serial=_n

sort serial
merge serial using temp1.dta
drop _merge

sort serial
merge serial using temp2.dta
drop _merge serial

/* binary network at endline */

gen link1=link0
gen temp=$ntk_change
replace link1=link_temp if (itt_i==1 | itt_j==1) & (id_i!= id_j) & temp!=0
drop temp link_temp

/* measurement error: need to be symmetric --> lower triangular matrix */

preserve

/* careful: need to compute the threshold within clusters */
drop if  village_i!= village_j

keep id_i id_j link0 link1
drop if id_i==id_j
drop if id_i>id_j

shufflevar link0 link1

/* switch indicator */

local treshold r(p$prob_switch)

gen random1=uniform() 
qui sum random1, detail
gen switch1=(random1<`treshold') 

gen random2=uniform() 
qui sum random2, detail
gen switch2=(random2<`treshold') 

drop link0 link1 random1 random2
ren id_i id1
ren id_j id2
sort id1 id2

save temp3.dta, replace

restore

/* re-merge and symmetrize */

ren id_i id1
ren id_j id2

sort id1 id2 
merge id1 id2 using temp3.dta
drop _merge

ren id1 id_i
ren id2 id_j 

ren id_j id1
ren id_i id2

ren switch1 switch1A
ren switch2 switch2A
ren link0_shuffled link0_shuffledA
ren link1_shuffled link1_shuffledA

sort id1 id2 
merge id1 id2 using temp3.dta
drop _merge

ren id1 id_j
ren id2 id_i 

replace switch1= switch1A if switch1==.
replace switch1= 0 if switch1==.
drop switch1A

replace switch2= switch2A if switch2==.
replace switch2= 0 if switch2==.
drop switch2A

replace link0_shuffled= link0_shuffledA if link0_shuffled==.
replace link0_shuffled= 0 if link0_shuffled==.
drop link0_shuffledA

replace link1_shuffled= link1_shuffledA if link1_shuffled==.
replace link1_shuffled= 0 if link1_shuffled==.
drop link1_shuffledA

sort id_i id_j

capture erase temp1.dta
capture erase temp2.dta
capture erase temp3.dta

/* add measurement errors to binary networks */

replace link0=link0_shuffled if switch1==1
replace link1=link1_shuffled if switch2==1

/* no links across clusters */

replace link0=0 if village_i!= village_j
replace link1=0 if village_i!= village_j

drop link0_shuffled link1_shuffled switch1 switch2
sort id_i id_j

/* binary matrix */

mata: G0 = st_data(., "link0")
mata: G0 = colshape(G0, $obs) 

mata: G1 = st_data(., "link1")
mata: G1 = colshape(G1, $obs) 

/* semi row standardization */

mata: G0_S=rowsum(G0,.)
mata: G0=G0:/G0_S

mata: G1_S=rowsum(G1,.)
mata: G1=G1:/G1_S

mata: mata drop G0_S G1_S

/* edit missing */

mata: G0=editmissing(G0, 0)
mata: G1=editmissing(G1, 0)

/* build delta_G */

mata: deltaG=G1-G0

mata: st_matrix("G0", G0)
mata: st_matrix("G1", G1)
mata: st_matrix("deltaG", deltaG)

/* re-import into Stata*/

preserve

clear
svmat G0
stack G01-G0$obs, into (G0_srs)
gen serial=_n
sort serial 
drop _stack
save temp0.dta, replace 

clear
svmat G1
stack G11-G1$obs, into (G1_srs)
gen serial=_n
sort serial 
drop _stack
save temp1.dta, replace 

clear
svmat deltaG
stack deltaG1-deltaG$obs, into (deltaG_srs)
gen serial=_n
sort serial 
drop _stack
save temp2.dta, replace 

restore

gen serial=_n

sort serial 
merge serial using temp0
drop _merge
capture erase temp0.dta

sort serial 
merge serial using temp1
drop _merge
capture erase temp1.dta

sort serial 
merge serial using temp2
drop serial _merge
capture erase temp2.dta

/* vectors of outcomes */ 

mata: A=I($obs)-$beta1*G0
mata: y0=lusolve(A,er0)

mata: A=I($obs)-$beta1*G0-$beta2*deltaG
mata: B=$gamma*ITT+er1
mata: y1=lusolve(A,B)
mata: mata drop A B

/*vectors of interest*/

mata: deltay=y1-y0
mata: G0_y0=G0*y0
mata: G0_y1=G0*y1
mata: G0_deltay=G0*deltay
mata: deltaG_y1=deltaG*y1

/*IVs of second order*/

mata: IV_2_1=G0*G0*ITT
mata: IV_2_2=deltaG*deltaG*ITT
mata: IV_2_3=G0*deltaG*ITT
mata: IV_2_4=deltaG*G0*ITT

/*IVs of third order*/

mata: IV_3_1=G0*G0*G0*ITT
mata: IV_3_2=deltaG*deltaG*deltaG*ITT
mata: IV_3_3=G0*G0*deltaG*ITT
mata: IV_3_4=deltaG*deltaG*G0*ITT
mata: IV_3_5=G0*deltaG*deltaG*ITT
mata: IV_3_6=deltaG*G0*G0*ITT
mata: IV_3_7=G0*deltaG*G0*ITT
mata: IV_3_8=deltaG*G0*deltaG*ITT

mata: results=(y0, y1, deltay, G0_y0, G0_y1, G0_deltay, deltaG_y1, IV_2_1, IV_2_2, IV_2_3, IV_2_4, IV_3_1, IV_3_2, IV_3_3, IV_3_4, IV_3_5, IV_3_6, IV_3_7, IV_3_8)
mata: st_matrix("result", results)

/*re-import into Stata*/

clear
svmat result

ren result1 y0
ren result2 y1
ren result3 deltay
ren result4 G0_y0
ren result5 G0_y1
ren result6 G0_deltay
ren result7 deltaG_y1
ren result8 IV_2_1
ren result9 IV_2_2
ren result10 IV_2_3
ren result11 IV_2_4
ren result12 IV_3_1
ren result13 IV_3_2
ren result14 IV_3_3
ren result15 IV_3_4
ren result16 IV_3_5
ren result17 IV_3_6
ren result18 IV_3_7
ren result19 IV_3_8

gen id=_n
sort id
save temp.dta, replace 

use simul_ComolaPrina_cov.dta, clear
sort id
merge id using temp.dta
drop _merge
capture erase temp.dta

/****************************************************************************
* ESTIMATE THE MODELS OF INTEREST
****************************************************************************/

global IV_list="IV_2_1 IV_2_2 IV_2_3 IV_2_4 IV_3_1 IV_3_2 IV_3_3 IV_3_4 IV_3_5 IV_3_6 IV_3_7 IV_3_8"

/* dynamic PE, IV, robust s.e. */

ivregress 2sls deltay itt (G0_deltay deltaG_y1 = $IV_list), nocons r

preserve
regsave, ci
keep if var=="itt"
drop var N
gen coverage = ($gamma>ci_lower & $gamma<ci_upper)
save temp_gamma_robust.dta, replace 
restore

preserve
regsave, ci
keep if var=="G0_deltay"
drop var N
gen coverage = ($beta1>ci_lower & $beta1<ci_upper)
save temp_beta1_robust.dta, replace 
restore

preserve
regsave, ci
keep if var=="deltaG_y1"
drop var N
gen coverage = ($beta2>ci_lower & $beta2<ci_upper)
save temp_beta2_robust.dta, replace 
restore

/* dynamic PE, IV, clustered robust s.e. */

ivregress 2sls deltay itt (G0_deltay deltaG_y1 = $IV_list), nocons cluster(village)

preserve
regsave, ci
keep if var=="itt"
drop var N
gen coverage = ($gamma>ci_lower & $gamma<ci_upper)
save temp_gamma_cluster.dta, replace 
restore

preserve
regsave, ci
keep if var=="G0_deltay"
drop var N 
gen coverage = ($beta1>ci_lower & $beta1<ci_upper)
save temp_beta1_cluster.dta, replace 
restore

preserve
regsave, ci
keep if var=="deltaG_y1"
drop var N
gen coverage = ($beta2>ci_lower & $beta2<ci_upper)
save temp_beta2_cluster.dta, replace 
restore

/* dynamic PE, IV, bootstrapped s.e.*/

ivregress 2sls deltay itt (G0_deltay deltaG_y1 = $IV_list), nocons vce(boot)

preserve
regsave, ci
keep if var=="itt"
drop var N
gen coverage = ($gamma>ci_lower & $gamma<ci_upper)
save temp_gamma_boot.dta, replace 
restore

preserve
regsave, ci
keep if var=="G0_deltay"
drop var N
gen coverage = ($beta1>ci_lower & $beta1<ci_upper)
save temp_beta1_boot.dta, replace 
restore

preserve
regsave, ci
keep if var=="deltaG_y1"
drop var N
gen coverage = ($beta2>ci_lower & $beta2<ci_upper)
save temp_beta2_boot.dta, replace 
restore

/* dynamic PE, IV, clustered bootstrapped s.e.*/

ivregress 2sls deltay itt (G0_deltay deltaG_y1 = $IV_list), nocons vce(boot, cl(village))

preserve
regsave, ci
keep if var=="itt"
drop var N
gen coverage = ($gamma>ci_lower & $gamma<ci_upper)
save temp_gamma_bootcl.dta, replace 
restore

preserve
regsave, ci
keep if var=="G0_deltay"
drop var N
gen coverage = ($beta1>ci_lower & $beta1<ci_upper)
save temp_beta1_bootcl.dta, replace 
restore

preserve
regsave, ci
keep if var=="deltaG_y1"
drop var N
gen coverage = ($beta2>ci_lower & $beta2<ci_upper)
save temp_beta2_bootcl.dta, replace 
restore

erase simul_ComolaPrina_cov.dta

end

/*********************************************************************************************
**********************************************************************************************
3) REPEAT THE PROCEDURE N TIMES AND SAVE THE QUANTITIES OF INTEREST
**********************************************************************************************
*********************************************************************************************/

/* blank data */

clear
set more off

set obs 0
gen id=_n

foreach var1 in gamma beta1 beta2 {
foreach var2 in robust cluster boot bootcl {
save results_cov_`var1'_`var2'.dta, replace 
}
} 

/* repeat the procedure and format appropriately */

set seed 19790703

forvalues i=1/$simul {

simulation_ComolaPrina_cov

foreach var1 in gamma beta1 beta2 {
foreach var2 in robust cluster boot bootcl {

use temp_`var1'_`var2'.dta, clear
gen id=`i'
append using results_cov_`var1'_`var2'.dta
save results_cov_`var1'_`var2'.dta, replace 
erase temp_`var1'_`var2'.dta

}
}

}

/********************************************************************************************
*********************************************************************************************
4) PRODUCE FINAL STATISTICS
*********************************************************************************************
********************************************************************************************/

foreach var1 in gamma beta1 beta2 {
foreach var2 in robust cluster boot bootcl {

use results_cov_`var1'_`var2'.dta, clear
display "`var1'_`var2'"
mean coverage 

}
} 


